This code is for the results to the question: Does what caregivers are doing predict how much they engage with their target children?
library(tidyverse)
library(Hmisc)
library(GGally)
library(ppcor)
library(gridExtra)
library(psych)
library(sjPlot)
library(stargazer)
theme_set(theme_bw())
# demographics
demo_english <- read_csv("./data_demo_lena_transcripts/demo_english_ms.csv") %>%
rename(id = ID, hi = HI24Final, momed = Momed) %>%
dplyr::select(id, hi, momed) %>%
mutate(id = as.character(id),
language = "english")
demo_spanish <- read_csv("./data_demo_lena_transcripts/demo_spanish_ms.csv") %>%
rename(id = ID, hi = HI_18, momed = MomEd_25m) %>%
dplyr::select(id, hi, momed) %>%
mutate(id = as.character(id),
language = "spanish")
demo <- rbind(demo_english, demo_spanish)
# freq
# sum across segments, so there are only up to 4 data points per child (1 for each activity group)
freq_reg <- read_csv("./data_demo_lena_transcripts/freq.csv") %>%
filter(activity != "kwalods",
speech == "all") %>%
mutate(activity = recode(activity, "play" = "cc", "conv" = "cc",
"food" = "cc", "routines" = "cc", "gemods" = "non_tcds")) %>%
mutate(id = factor(id),
language = factor(language)) %>%
group_by(id, activity, speaker) %>% # not grouping by segment number because looking across the whole tCDS hour
mutate(tokens_total_4act = sum(tokens),
types_total_4act = sum(types),
dur_min_4act = sum(dur_min)) %>%
distinct(id, activity, language, speaker, tokens_total_4act, types_total_4act, dur_min_4act) %>%
ungroup() %>%
mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds")))
# subset for adult and child
freq_reg_adults <- freq_reg %>%
filter(speaker == "ADULTS")
freq_reg_child <- freq_reg %>%
filter(speaker == "CHI")
str(freq_reg_adults)
## tibble[,7] [312 × 7] (S3: tbl_df/tbl/data.frame)
## $ id : Factor w/ 90 levels "7292","7352",..: 47 50 52 53 54 55 56 57 61 65 ...
## $ activity : Factor w/ 4 levels "books","cc","ac",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ speaker : chr [1:312] "ADULTS" "ADULTS" "ADULTS" "ADULTS" ...
## $ language : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
## $ tokens_total_4act: num [1:312] 294 588 438 4607 4463 ...
## $ types_total_4act : num [1:312] 135 199 112 1323 1111 ...
## $ dur_min_4act : num [1:312] 10.12 4.71 6.76 39.92 32.1 ...
str(freq_reg_child)
## tibble[,7] [312 × 7] (S3: tbl_df/tbl/data.frame)
## $ id : Factor w/ 90 levels "7292","7352",..: 47 50 52 53 54 55 56 57 61 65 ...
## $ activity : Factor w/ 4 levels "books","cc","ac",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ speaker : chr [1:312] "CHI" "CHI" "CHI" "CHI" ...
## $ language : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
## $ tokens_total_4act: num [1:312] 65 58 75 330 131 13 34 322 240 15 ...
## $ types_total_4act : num [1:312] 34 17 36 181 47 7 16 124 89 9 ...
## $ dur_min_4act : num [1:312] 10.12 4.71 6.76 39.92 32.1 ...
freq_hr_child <- read_csv("./data_demo_lena_transcripts/freq.csv") %>%
dplyr::select(-X1) %>%
filter(speech == "all",
speaker == "CHI") %>%
group_by(id) %>%
mutate(tokens_sum_child = sum(tokens),
dur_hr = sum(dur_min)/60,
tokens_hr_child = tokens_sum_child/dur_hr) %>%
distinct(id, language, tokens_hr_child) %>%
ungroup() %>%
mutate(id = as.character(id))
# freq adult
freq_reg_adults_wide <- freq_reg_adults %>%
ungroup() %>%
group_by(id) %>%
mutate(tokens_sum_adult = sum(tokens_total_4act),
dur_hr = sum(dur_min_4act)/60,
tokens_hr_adult = tokens_sum_adult/dur_hr) %>%
full_join(freq_hr_child, by = c("id", "language")) %>%
distinct(id, activity, language, dur_min_4act, tokens_hr_adult, tokens_hr_child) %>%
spread(activity, dur_min_4act) %>%
replace_na(list(books = 0, cc = 0, ac = 0, non_tcds = 0)) %>%
mutate(br_group = ifelse(books == 0, 0, 1),
br_group = as.factor(br_group))
psych::describe(freq_reg_adults_wide$tokens_hr_adult)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 90 3005.01 1274.48 2831.2 2950.56 1172.3 366.26 8302 7935.74 0.8 2.03 134.34
# descriptives
hist(freq_reg_adults_wide$books)
hist(freq_reg_adults_wide$cc)
hist(freq_reg_adults_wide$ac)
hist(freq_reg_adults_wide$non_tcds)
# difference between families who BR vs. non
ggplot(freq_reg_adults_wide, aes(x = br_group, y = tokens_hr_adult)) +
geom_boxplot() +
geom_jitter(alpha = .5)
# t-tests
t.test(tokens_hr_adult ~ br_group,
data = freq_reg_adults_wide, var.equal = TRUE)
##
## Two Sample t-test
##
## data: tokens_hr_adult by br_group
## t = -3.3415, df = 88, p-value = 0.001224
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1359.4204 -345.4796
## sample estimates:
## mean in group 0 mean in group 1
## 2607.197 3459.647
# freq_reg_adults_wide_rmoutlier <- freq_reg_adults_wide %>%
# filter(id != 20056 & id != 7412)
# nrow(freq_reg_adults_wide_rmoutlier)
# models with tokens
m1 <- lm(tokens_hr_adult ~ tokens_hr_child,
data = freq_reg_adults_wide)
m2 <- lm(tokens_hr_adult ~ tokens_hr_child + language,
data = freq_reg_adults_wide)
m3 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books,
data = freq_reg_adults_wide)
m4 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc,
data = freq_reg_adults_wide)
m5 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac,
data = freq_reg_adults_wide)
m6 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + non_tcds,
data = freq_reg_adults_wide)
anova(m1, m2, m3, m4, m5, m6)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac +
## non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 88 125184941
## 2 87 116398481 1 8786460 9.7597 0.002457 **
## 3 86 81225939 1 35172541 39.0686 1.682e-08 ***
## 4 85 75762325 1 5463615 6.0688 0.015825 *
## 5 84 75413805 1 348520 0.3871 0.535520
## 6 83 74722972 1 690833 0.7674 0.383564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books +
## cc, data = freq_reg_adults_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1693.9 -577.2 -223.8 519.9 3434.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1774.6234 308.3866 5.755 1.34e-07 ***
## tokens_hr_child 1.3555 0.4259 3.183 0.00204 **
## languagespanish -619.4245 199.5598 -3.104 0.00259 **
## books 76.9906 11.6153 6.628 2.95e-09 ***
## cc 21.9120 8.8503 2.476 0.01528 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 944.1 on 85 degrees of freedom
## Multiple R-squared: 0.4759, Adjusted R-squared: 0.4513
## F-statistic: 19.3 on 4 and 85 DF, p-value: 2.52e-11
# checking if language is a moderator
m4 <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc,
data = freq_reg_adults_wide)
m4_intx1 <- lm(tokens_hr_adult ~ tokens_hr_child + (language * books) + cc,
data = freq_reg_adults_wide)
m4_intx2 <- lm(tokens_hr_adult ~ tokens_hr_child + (language * books) + (language * cc),
data = freq_reg_adults_wide)
anova(m4, m4_intx1, m4_intx2)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 2: tokens_hr_adult ~ tokens_hr_child + (language * books) + cc
## Model 3: tokens_hr_adult ~ tokens_hr_child + (language * books) + (language *
## cc)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 85 75762325
## 2 84 72788793 1 2973532 3.4102 0.06836 .
## 3 83 72372375 1 416419 0.4776 0.49145
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4_intx1)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + (language *
## books) + cc, data = freq_reg_adults_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1797.7 -605.6 -150.2 471.4 2859.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1623.278 314.853 5.156 1.65e-06 ***
## tokens_hr_child 1.374 0.420 3.271 0.00156 **
## languagespanish -389.510 232.639 -1.674 0.09779 .
## books 99.176 16.571 5.985 5.14e-08 ***
## cc 22.690 8.736 2.597 0.01110 *
## languagespanish:books -41.563 22.437 -1.852 0.06748 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 930.9 on 84 degrees of freedom
## Multiple R-squared: 0.4965, Adjusted R-squared: 0.4665
## F-statistic: 16.57 on 5 and 84 DF, p-value: 2.366e-11
# # check on potential outlier - removing these didn't change pattern of findings
# # 20056 - possibly bc high densest hour; 7412 - less clear, but does have 3rd highest cc value
# cooksd <- cooks.distance(m4)
# View(cooksd)
# influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))]) # influential row numbers
# inf_df <- head(freq_reg_adults_wide_rmoutlier[influential, ]) # influential observations
# View(inf_df)
# table
stargazer(m1, m2, m3, m4, type = "text",
star.char = c(".","*","**","***"),
star.cutoffs = c(.1, .05, .01, .001),
notes = c(". p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
notes.append = F,
digits = 3,
dep.var.labels = c("Word Tokens in the Densest hour"),
covariate.labels=c("Child Tokens per Hour", "Language Group",
"Book Sharing (min)",
"Other-Child-Centered Activities (min)"))
##
## =================================================================================================================================
## Dependent variable:
## -------------------------------------------------------------------------------------------
## Word Tokens in the Densest hour
## (1) (2) (3) (4)
## ---------------------------------------------------------------------------------------------------------------------------------
## Child Tokens per Hour 1.940*** 1.845*** 1.527*** 1.355**
## (0.526) (0.511) (0.433) (0.426)
##
## Language Group -626.536* -623.618** -619.424**
## (244.485) (205.418) (199.560)
##
## Book Sharing (min) 71.738*** 76.991***
## (11.756) (11.615)
##
## Other-Child-Centered Activities (min) 21.912*
## (8.850)
##
## Constant 2,160.060*** 2,514.431*** 2,256.858*** 1,774.623***
## (261.190) (288.588) (246.119) (308.387)
##
## ---------------------------------------------------------------------------------------------------------------------------------
## Observations 90 90 90 90
## R2 0.134 0.195 0.438 0.476
## Adjusted R2 0.124 0.176 0.419 0.451
## Residual Std. Error 1,192.710 (df = 88) 1,156.682 (df = 87) 971.848 (df = 86) 944.098 (df = 85)
## F Statistic 13.621*** (df = 1; 88) 10.525*** (df = 2; 87) 22.353*** (df = 3; 86) 19.297*** (df = 4; 85)
## =================================================================================================================================
## Note: . p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# diagnostics
plot_model(m4, type = "diag")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# models with tokens and cc before book sharing - cc doesn't add before book sharing
m3.cc <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc,
data = freq_reg_adults_wide)
anova(m1, m2, m3.cc, m4, m5, m6)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac +
## non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 88 125184941
## 2 87 116398481 1 8786460 9.7597 0.002457 **
## 3 86 114922558 1 1475922 1.6394 0.203973
## 4 85 75762325 1 39160234 43.4980 3.706e-09 ***
## 5 84 75413805 1 348520 0.3871 0.535520
## 6 83 74722972 1 690833 0.7674 0.383564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3.cc)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc,
## data = freq_reg_adults_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1764.9 -734.8 -213.7 677.6 5406.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2277.6447 365.9879 6.223 1.71e-08 ***
## tokens_hr_child 1.7695 0.5158 3.431 0.000927 ***
## languagespanish -624.5021 244.3464 -2.556 0.012352 *
## cc 11.1971 10.6544 1.051 0.296230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1156 on 86 degrees of freedom
## Multiple R-squared: 0.205, Adjusted R-squared: 0.1773
## F-statistic: 7.393 on 3 and 86 DF, p-value: 0.0001828
summary(m4)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books +
## cc, data = freq_reg_adults_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1693.9 -577.2 -223.8 519.9 3434.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1774.6234 308.3866 5.755 1.34e-07 ***
## tokens_hr_child 1.3555 0.4259 3.183 0.00204 **
## languagespanish -619.4245 199.5598 -3.104 0.00259 **
## books 76.9906 11.6153 6.628 2.95e-09 ***
## cc 21.9120 8.8503 2.476 0.01528 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 944.1 on 85 degrees of freedom
## Multiple R-squared: 0.4759, Adjusted R-squared: 0.4513
## F-statistic: 19.3 on 4 and 85 DF, p-value: 2.52e-11
summary(m6)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books +
## cc + ac + non_tcds, data = freq_reg_adults_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1501.6 -565.1 -220.8 469.5 3389.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3063.4274 5305.5891 -0.577 0.56523
## tokens_hr_child 1.3203 0.4302 3.069 0.00290 **
## languagespanish -664.6020 209.9614 -3.165 0.00217 **
## books 159.0264 89.3238 1.780 0.07868 .
## cc 103.4476 89.0727 1.161 0.24882
## ac 90.3713 91.5556 0.987 0.32648
## non_tcds 78.4925 89.6045 0.876 0.38356
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 948.8 on 83 degrees of freedom
## Multiple R-squared: 0.4831, Adjusted R-squared: 0.4457
## F-statistic: 12.93 on 6 and 83 DF, p-value: 2.897e-10
m5.ods <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds,
data = freq_reg_adults_wide)
anova(m1, m2, m3, m4, m5.ods, m6)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac +
## non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 88 125184941
## 2 87 116398481 1 8786460 9.7597 0.002457 **
## 3 86 81225939 1 35172541 39.0686 1.682e-08 ***
## 4 85 75762325 1 5463615 6.0688 0.015825 *
## 5 84 75600109 1 162216 0.1802 0.672313
## 6 83 74722972 1 877137 0.9743 0.326480
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# models with ONLY BR families
bronly_tokens <- freq_reg_adults_wide %>%
filter(br_group == 1)
# regressions
m1.bronly <- lm(tokens_hr_adult ~ tokens_hr_child,
data = bronly_tokens)
m2.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language,
data = bronly_tokens)
m3.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books,
data = bronly_tokens)
m4.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc,
data = bronly_tokens)
m5.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac,
data = bronly_tokens)
m6.bronly <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac + non_tcds,
data = bronly_tokens)
anova(m1.bronly, m2.bronly, m3.bronly, m4.bronly, m5.bronly, m6.bronly)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac +
## non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 40 71375267
## 2 39 66449426 1 4925841 4.7571 0.03598 *
## 3 38 40082948 1 26366478 25.4633 1.396e-05 ***
## 4 37 39680008 1 402941 0.3891 0.53680
## 5 36 38279155 1 1400852 1.3529 0.25265
## 6 35 36241501 1 2037654 1.9679 0.16948
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3.bronly)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + books,
## data = bronly_tokens)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1505.9 -784.9 -204.2 566.6 3003.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2351.6180 424.7781 5.536 2.46e-06 ***
## tokens_hr_child 0.9960 0.6479 1.537 0.132
## languagespanish -748.2907 318.1371 -2.352 0.024 *
## books 83.1373 16.6287 5.000 1.33e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1027 on 38 degrees of freedom
## Multiple R-squared: 0.4519, Adjusted R-squared: 0.4086
## F-statistic: 10.44 on 3 and 38 DF, p-value: 3.792e-05
# checking if language is a moderator
m3.bronly_intx <- lm(tokens_hr_adult ~ tokens_hr_child + (language * books),
data = bronly_tokens)
anova(m3.bronly, m3.bronly_intx)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 2: tokens_hr_adult ~ tokens_hr_child + (language * books)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 38 40082948
## 2 37 36928936 1 3154013 3.1601 0.08368 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# table
stargazer(m1.bronly, m2.bronly, m3.bronly, type = "text",
star.char = c(".","*","**","***"),
star.cutoffs = c(.1, .05, .01, .001),
notes = c(". p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
notes.append = F,
digits = 3,
dep.var.labels = c("Word Tokens in the Densest hour"),
covariate.labels=c("Child Tokens per Hour", "Language Group",
"Book Sharing (min)"))
##
## ====================================================================================
## Dependent variable:
## --------------------------------------------------------------
## Word Tokens in the Densest hour
## (1) (2) (3)
## ------------------------------------------------------------------------------------
## Child Tokens per Hour 0.834 0.919 0.996
## (0.841) (0.823) (0.648)
##
## Language Group -686.979. -748.291*
## (404.033) (318.137)
##
## Book Sharing (min) 83.137***
## (16.629)
##
## Constant 3,054.460*** 3,340.207*** 2,351.618***
## (457.731) (477.810) (424.778)
##
## ------------------------------------------------------------------------------------
## Observations 42 42 42
## R2 0.024 0.091 0.452
## Adjusted R2 -0.0004 0.045 0.409
## Residual Std. Error 1,335.807 (df = 40) 1,305.309 (df = 39) 1,027.042 (df = 38)
## F Statistic 0.983 (df = 1; 40) 1.960 (df = 2; 39) 10.443*** (df = 3; 38)
## ====================================================================================
## Note: . p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# plot model
plot_model(m3.bronly, type = "pred", terms = c("books"),
show.data = T,
dot.size = 4,
colors = "darkviolet") +
aes(color = group) +
font_size(axis_title.x = 25, axis_title.y = 25, labels.x = 18, labels.y = 18) +
labs(x = "Book Sharing Duration (min)", y = "Caregiver\nTotal Word Tokens\n(Densest tCDS Hour)", title = "") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5),
legend.position = "none")
ggsave("./figures/balloon_bronly.pdf", width = 12, height = 8, units = "in", dpi = 300)
# model with ONLY BR families and cc first
m3.bronly.cc <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc,
data = bronly_tokens)
anova(m1.bronly, m2.bronly, m3.bronly.cc, m4.bronly, m5.bronly, m6.bronly)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac +
## non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 40 71375267
## 2 39 66449426 1 4925841 4.7571 0.03598 *
## 3 38 65524463 1 924963 0.8933 0.35107
## 4 37 39680008 1 25844455 24.9591 1.624e-05 ***
## 5 36 38279155 1 1400852 1.3529 0.25265
## 6 35 36241501 1 2037654 1.9679 0.16948
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# model with ONLY BR families and non_tCDS above ac
m5.bronly.ods <- lm(tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds,
data = bronly_tokens)
anova(m1.bronly, m2.bronly, m3.bronly, m4.bronly, m5.bronly.ods, m6.bronly)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + books
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + books + cc
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + books + cc + non_tcds
## Model 6: tokens_hr_adult ~ tokens_hr_child + language + books + cc + ac +
## non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 40 71375267
## 2 39 66449426 1 4925841 4.7571 0.03598 *
## 3 38 40082948 1 26366478 25.4633 1.396e-05 ***
## 4 37 39680008 1 402941 0.3891 0.53680
## 5 36 39081454 1 598553 0.5780 0.45217
## 6 35 36241501 1 2839953 2.7427 0.10664
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# diagnostics
plot_model(m3.bronly, type = "diag")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# models with NON BR families
nonbr_tokens <- freq_reg_adults_wide %>%
filter(br_group == 0)
# regressions
m1.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child,
data = nonbr_tokens)
m2.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language,
data = nonbr_tokens)
m3.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc,
data = nonbr_tokens)
m4.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc + ac,
data = nonbr_tokens)
m5.nonbr <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds,
data = nonbr_tokens)
anova(m1.nonbr, m2.nonbr, m3.nonbr, m4.nonbr, m5.nonbr)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + cc + ac
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 46 39920314
## 2 45 37515133 1 2405181 3.1538 0.08299 .
## 3 44 32693714 1 4821419 6.3222 0.01584 *
## 4 43 32550184 1 143529 0.1882 0.66664
## 5 42 32030174 1 520010 0.6819 0.41361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3.nonbr)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc,
## data = nonbr_tokens)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1283.63 -578.33 -95.85 455.84 1863.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1473.962 353.865 4.165 0.000143 ***
## tokens_hr_child 1.955 0.581 3.364 0.001599 **
## languagespanish -455.133 253.398 -1.796 0.079343 .
## cc 24.509 9.621 2.547 0.014427 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 862 on 44 degrees of freedom
## Multiple R-squared: 0.4072, Adjusted R-squared: 0.3668
## F-statistic: 10.08 on 3 and 44 DF, p-value: 3.568e-05
summary(m4.nonbr)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc +
## ac, data = nonbr_tokens)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1289.10 -627.14 -65.08 469.58 1932.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1637.5147 518.3136 3.159 0.00289 **
## tokens_hr_child 2.0199 0.6052 3.337 0.00175 **
## languagespanish -426.3603 264.1618 -1.614 0.11384
## cc 20.8609 12.8250 1.627 0.11113
## ac -9.8591 22.6418 -0.435 0.66542
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 870 on 43 degrees of freedom
## Multiple R-squared: 0.4098, Adjusted R-squared: 0.3549
## F-statistic: 7.465 on 4 and 43 DF, p-value: 0.0001168
# checking if language is a moderator
m3.nonbr_intx <- lm(tokens_hr_adult ~ tokens_hr_child + (language * cc),
data = nonbr_tokens)
anova(m3.nonbr, m3.nonbr_intx)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 2: tokens_hr_adult ~ tokens_hr_child + (language * cc)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 44 32693714
## 2 43 30895448 1 1798266 2.5028 0.121
# table
stargazer(m1.nonbr, m2.nonbr, m3.nonbr, m4.nonbr, type = "text",
star.char = c(".","*","**","***"),
star.cutoffs = c(.1, .05, .01, .001),
notes = c(". p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
notes.append = F,
digits = 3,
dep.var.labels = c("Word Tokens in the Densest hour"),
covariate.labels=c("Child Tokens per Hour", "Language Group",
"Other Child-Centered Activities (min)",
"Adult-Centered Activities (min)"))
##
## ================================================================================================================================
## Dependent variable:
## ------------------------------------------------------------------------------------------
## Word Tokens in the Densest hour
## (1) (2) (3) (4)
## --------------------------------------------------------------------------------------------------------------------------------
## Child Tokens per Hour 2.506*** 2.319*** 1.955** 2.020**
## (0.598) (0.596) (0.581) (0.605)
##
## Language Group -455.901. -455.133. -426.360
## (268.407) (253.398) (264.162)
##
## Other Child-Centered Activities (min) 24.509* 20.861
## (9.621) (12.825)
##
## Adult-Centered Activities (min) -9.859
## (22.642)
##
## Constant 1,626.145*** 1,936.715*** 1,473.962*** 1,637.515**
## (270.010) (321.664) (353.866) (518.314)
##
## --------------------------------------------------------------------------------------------------------------------------------
## Observations 48 48 48 48
## R2 0.276 0.320 0.407 0.410
## Adjusted R2 0.260 0.290 0.367 0.355
## Residual Std. Error 931.575 (df = 46) 913.055 (df = 45) 861.997 (df = 44) 870.047 (df = 43)
## F Statistic 17.555*** (df = 1; 46) 10.580*** (df = 2; 45) 10.076*** (df = 3; 44) 7.465*** (df = 4; 43)
## ================================================================================================================================
## Note: . p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# plot model
plot_model(m3.nonbr, type = "pred", terms = c("cc"),
show.data = T,
dot.size = 4,
colors = "firebrick1") +
aes(color = group) +
font_size(axis_title.x = 25, axis_title.y = 25, labels.x = 18, labels.y = 18) +
labs(x = "Other Child-Centered Duration (min)", y = "Caregiver\nTotal Word Tokens\n(Densest tCDS Hour)", title = "") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5),
legend.position = "none")
ggsave("./figures/balloon_nonbronly.pdf", width = 12, height = 8, units = "in", dpi = 300)
# model with NON BR families and ac above cc; p = .06
m3.nonbr.ac <- lm(tokens_hr_adult ~ tokens_hr_child + language + ac,
data = nonbr_tokens)
anova(m1.nonbr, m2.nonbr, m3.nonbr.ac, m4.nonbr, m5.nonbr)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + ac
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + cc + ac
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 46 39920314
## 2 45 37515133 1 2405181 3.1538 0.08299 .
## 3 44 34552984 1 2962148 3.8842 0.05536 .
## 4 43 32550184 1 2002800 2.6262 0.11260
## 5 42 32030174 1 520010 0.6819 0.41361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4.nonbr)
##
## Call:
## lm(formula = tokens_hr_adult ~ tokens_hr_child + language + cc +
## ac, data = nonbr_tokens)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1289.10 -627.14 -65.08 469.58 1932.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1637.5147 518.3136 3.159 0.00289 **
## tokens_hr_child 2.0199 0.6052 3.337 0.00175 **
## languagespanish -426.3603 264.1618 -1.614 0.11384
## cc 20.8609 12.8250 1.627 0.11113
## ac -9.8591 22.6418 -0.435 0.66542
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 870 on 43 degrees of freedom
## Multiple R-squared: 0.4098, Adjusted R-squared: 0.3549
## F-statistic: 7.465 on 4 and 43 DF, p-value: 0.0001168
# model with NON BR families and gemods above ac
m4.nonbr.ods <- lm(tokens_hr_adult ~ tokens_hr_child + language + cc + non_tcds,
data = nonbr_tokens)
anova(m1.nonbr, m2.nonbr, m3.nonbr, m4.nonbr.ods, m5.nonbr)
## Analysis of Variance Table
##
## Model 1: tokens_hr_adult ~ tokens_hr_child
## Model 2: tokens_hr_adult ~ tokens_hr_child + language
## Model 3: tokens_hr_adult ~ tokens_hr_child + language + cc
## Model 4: tokens_hr_adult ~ tokens_hr_child + language + cc + non_tcds
## Model 5: tokens_hr_adult ~ tokens_hr_child + language + cc + ac + non_tcds
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 46 39920314
## 2 45 37515133 1 2405181 3.1538 0.08299 .
## 3 44 32693714 1 4821419 6.3222 0.01584 *
## 4 43 32628821 1 64892 0.0851 0.77195
## 5 42 32030174 1 598647 0.7850 0.38067
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# correlations
ggplot(nonbr_tokens, aes(cc, ac, color = language)) +
geom_point() +
geom_smooth(method = "lm")
cor.test(nonbr_tokens$cc, nonbr_tokens$ac)
##
## Pearson's product-moment correlation
##
## data: nonbr_tokens$cc and nonbr_tokens$ac
## t = -5.3799, df = 46, p-value = 2.432e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7696821 -0.4096571
## sample estimates:
## cor
## -0.6214557
# diagnostics
plot_model(m3.nonbr, type = "diag")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]